This report includes a differential binding analysis (DBA) and genomic annotation of AR ChIP-Seq data sets in LNCaP cells that have been starved and subsequently treated with DHT for 4 different periods of time, including 30 minutes, 2 hours, 4 hours and 16 hours, or EtOH as a control.
AR ChIP-Seq data was processed using the “ngsane” ChIP-Seq pipeline with default parameters. Input Bam files have been aligned to hg38, sorted and deduplicated in ngsane pipeline with bowtie2 and samtools. Peak files were called in ngsane with MACS2 and default parameters.
library(DiffBind)
library(rgl)
library(DESeq2)
library(dplyr)
library(tidyverse)
library(rtracklayer)
library(profileplyr)
library(ChIPQC)
library(VennDiagram)
library(GenomicRanges)
library(ChIPpeakAnno)
library(colorspace)
library(readr)
library(knitr)
library(magick)
library(UpSetR)Load sample sheet from working directory
## SampleID Tissue Factor Condition Treatment Replicate
## 1 DHT_30mins_Rep1 LNCaP AR DHT DHT_30min 1
## 2 DHT_30mins_Rep2 LNCaP AR DHT DHT_30min 2
## 3 DHT_30mins_Rep3 LNCaP AR DHT DHT_30min 3
## 4 DHT_2hrs_Rep1 LNCaP AR DHT DHT_2hrs 1
## 5 DHT_2hrs_Rep2 LNCaP AR DHT DHT_2hrs 2
## 6 DHT_2hrs_Rep3 LNCaP AR DHT DHT_2hrs 3
## 7 DHT_4hrs_Rep1 LNCaP AR DHT DHT_4hrs 1
## 8 DHT_4hrs_Rep2 LNCaP AR DHT DHT_4hrs 2
## 9 DHT_4hrs_Rep3 LNCaP AR DHT DHT_4hrs 3
## 10 DHT_16hrs_Rep1 LNCaP AR DHT DHT_16hrs 1
## 11 DHT_16hrs_Rep2 LNCaP AR DHT DHT_16hrs 2
## 12 DHT_16hrs_Rep3 LNCaP AR DHT DHT_16hrs 3
## 13 EtOH_Rep1 LNCaP AR EtOH EtOH 1
## 14 EtOH_Rep2 LNCaP AR EtOH EtOH 2
## 15 EtOH_Rep3 LNCaP AR EtOH EtOH 3
## bamReads
## 1 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_05h_DHT_1.asd.bam
## 2 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_05h_DHT_2.asd.bam
## 3 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_05h_DHT_3.asd.bam
## 4 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_2h_DHT_1.asd.bam
## 5 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_2h_DHT_2.asd.bam
## 6 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_2h_DHT_3.asd.bam
## 7 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_4h_DHT_1.asd.bam
## 8 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_4h_DHT_2.asd.bam
## 9 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_4h_DHT_3.asd.bam
## 10 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_16h_DHT_1.asd.bam
## 11 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_16h_DHT_2.asd.bam
## 12 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_16h_DHT_3.asd.bam
## 13 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_Veh_1.asd.bam
## 14 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_Veh_2.asd.bam
## 15 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_Veh_3.asd.bam
## ControlID
## 1 Input_Pooled
## 2 Input_Pooled
## 3 Input_Pooled
## 4 Input_Pooled
## 5 Input_Pooled
## 6 Input_Pooled
## 7 Input_Pooled
## 8 Input_Pooled
## 9 Input_Pooled
## 10 Input_Pooled
## 11 Input_Pooled
## 12 Input_Pooled
## 13 Input_Pooled
## 14 Input_Pooled
## 15 Input_Pooled
## bamControl
## 1 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_Pooled_Input.asd.bam
## 2 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_Pooled_Input.asd.bam
## 3 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_Pooled_Input.asd.bam
## 4 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_Pooled_Input.asd.bam
## 5 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_Pooled_Input.asd.bam
## 6 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_Pooled_Input.asd.bam
## 7 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_Pooled_Input.asd.bam
## 8 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_Pooled_Input.asd.bam
## 9 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_Pooled_Input.asd.bam
## 10 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_Pooled_Input.asd.bam
## 11 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_Pooled_Input.asd.bam
## 12 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_Pooled_Input.asd.bam
## 13 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_Pooled_Input.asd.bam
## 14 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_Pooled_Input.asd.bam
## 15 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/reads/LNCaP_AR_Pooled_Input.asd.bam
## Peaks
## 1 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/peaks/LNCaP_AR_05h_DHT_1_peaks.broadPeak
## 2 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/peaks/LNCaP_AR_05h_DHT_2_peaks.broadPeak
## 3 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/peaks/LNCaP_AR_05h_DHT_3_peaks.broadPeak
## 4 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/peaks/LNCaP_AR_2h_DHT_1_peaks.broadPeak
## 5 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/peaks/LNCaP_AR_2h_DHT_2_peaks.broadPeak
## 6 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/peaks/LNCaP_AR_2h_DHT_3_peaks.broadPeak
## 7 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/peaks/LNCaP_AR_4h_DHT_1_peaks.broadPeak
## 8 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/peaks/LNCaP_AR_4h_DHT_2_peaks.broadPeak
## 9 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/peaks/LNCaP_AR_4h_DHT_3_peaks.broadPeak
## 10 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/peaks/LNCaP_AR_16h_DHT_1_peaks.broadPeak
## 11 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/peaks/LNCaP_AR_16h_DHT_2_peaks.broadPeak
## 12 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/peaks/LNCaP_AR_16h_DHT_3_peaks.broadPeak
## 13 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/peaks/LNCaP_AR_Veh_1_peaks.broadPeak
## 14 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/peaks/LNCaP_AR_Veh_2_peaks.broadPeak
## 15 /Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq/peaks/LNCaP_AR_Veh_3_peaks.broadPeak
## PeakCaller
## 1 bed
## 2 bed
## 3 bed
## 4 bed
## 5 bed
## 6 bed
## 7 bed
## 8 bed
## 9 bed
## 10 bed
## 11 bed
## 12 bed
## 13 bed
## 14 bed
## 15 bed
Make DBA object from sample sheet
##Create DBA from files and sample sheet
#setwd("/Library/Frameworks/R.framework/Versions/4.1/Resources/library/DiffBind/extra")
#basedir <- system.file("/Users/elyssaparbery/external/ScratchGeneral/ngsane/projects/16.11.2020_LNCaP_Hg38_AR_ChIPSeq", package="DiffBind")
#AR_ChIP <- dba(sampleSheet = samples, dir=basedir)
AR_ChIP## 15 Samples, 21583 sites in matrix (30843 total):
## ID Tissue Factor Condition Treatment Replicate Intervals
## 1 DHT_30mins_Rep1 LNCaP AR DHT DHT_30min 1 4522
## 2 DHT_30mins_Rep2 LNCaP AR DHT DHT_30min 2 10419
## 3 DHT_30mins_Rep3 LNCaP AR DHT DHT_30min 3 4745
## 4 DHT_2hrs_Rep1 LNCaP AR DHT DHT_2hrs 1 11796
## 5 DHT_2hrs_Rep2 LNCaP AR DHT DHT_2hrs 2 7962
## 6 DHT_2hrs_Rep3 LNCaP AR DHT DHT_2hrs 3 8904
## 7 DHT_4hrs_Rep1 LNCaP AR DHT DHT_4hrs 1 18863
## 8 DHT_4hrs_Rep2 LNCaP AR DHT DHT_4hrs 2 7173
## 9 DHT_4hrs_Rep3 LNCaP AR DHT DHT_4hrs 3 11274
## 10 DHT_16hrs_Rep1 LNCaP AR DHT DHT_16hrs 1 17107
## 11 DHT_16hrs_Rep2 LNCaP AR DHT DHT_16hrs 2 24749
## 12 DHT_16hrs_Rep3 LNCaP AR DHT DHT_16hrs 3 17863
## 13 EtOH_Rep1 LNCaP AR EtOH EtOH 1 63
## 14 EtOH_Rep2 LNCaP AR EtOH EtOH 2 247
## 15 EtOH_Rep3 LNCaP AR EtOH EtOH 3 163
Plot raw correlation heatmap based on peak occupancy:
Report peaksets
#retrieve common peak sets before DBA
#AR_ChIP_replicate_consensus <- dba.peakset(AR_ChIP,consensus = -DBA_REPLICATE)
#AR_ChIP_DHT_Consensus_Peaks <- dba.peakset(AR_ChIP_replicate_consensus, 16:19, minOverlap = 4, bRetrieve = TRUE)
#AR_ChIP_EtOH_Consensus_Peaks <- dba.peakset(AR_ChIP_replicate_consensus, 20, bRetrieve = TRUE)
#AR_ChIP_DHT_05h_Peaks <- dba.peakset(AR_ChIP_replicate_consensus, 1:3, minOverlap = 3, bRetrieve = TRUE)
#AR_ChIP_DHT_2h_Peaks<- dba.peakset(AR_ChIP_replicate_consensus, 4:6, minOverlap = 3, bRetrieve = TRUE)
#AR_ChIP_DHT_4h_Peaks<- dba.peakset(AR_ChIP_replicate_consensus, 7:9, minOverlap = 3, bRetrieve = TRUE)
#AR_ChIP_DHT_16h_Peaks<- dba.peakset(AR_ChIP_replicate_consensus, 10:12, minOverlap = 3, bRetrieve = TRUE)Visualise overlaps in raw input peaks across DHT time points:
#Calc. overlaps
#AR_ChIP_olap_all_Peaks <- findOverlapsOfPeaks(AR_ChIP_DHT_05h_Peaks, AR_ChIP_DHT_2h_Peaks, AR_ChIP_DHT_4h_Peaks, AR_ChIP_DHT_16h_Peaks, AR_ChIP_EtOH_Consensus_Peaks, minoverlap=1)
#write out files for each peakset
#setwd("/Volumes/griw/Cancer-Epigenetics/Projects/LNCaP_VCaP_DHT_Hi-C/Elyssa_2021_analysis_and_integration")
#write.table(AR_ChIP_DHT_Consensus_Peaks, file="AR_ChIP_DHT_Consensus_Peaks.bed", quote=F, sep="\t", row.names=F, col.names=F)
#write.table(AR_ChIP_EtOH_Consensus_Peaks, file="AR_ChIP_EtOH_Consensus_Peaks.bed", quote=F, sep="\t", row.names=F, col.names=F)
#write.table(AR_ChIP_DHT_05h_Peaks, file="AR_ChIP_DHT_05h_Peaks.bed", quote=F, sep="\t", row.names=F, col.names=F)
#write.table(AR_ChIP_DHT_2h_Peaks, file="AR_ChIP_DHT_2h_Peaks.bed", quote=F, sep="\t", row.names=F, col.names=F)
#write.table(AR_ChIP_DHT_4h_Peaks, file="AR_ChIP_DHT_4h_Peaks.bed", quote=F, sep="\t", row.names=F, col.names=F)
#write.table(AR_ChIP_DHT_16h_Peaks, file="AR_ChIP_DHT_16h_Peaks.bed", quote=F, sep="\t", row.names=F, col.names=F)Overlaps in raw AR peaks per DHT time point
Overlaps in raw AR peaks in EtOH control and each DHT time point
#### {-}
Using the parameter “minOverlap=2” will include peaks that are present in at least 2 replicates per condition and treatment. This will exclude any unique peaks, likely to be background noise.
#Count reads in peaks
#Create correlation heatmap, using affinity (read count) data
#Heatmap indicates correlation between the location and signal strength of peaks (number of reads/read depth) between samples
#Taking peaks that are found in 2 of 3 replicates
#AR_ChIP_Counts <- dba.count(AR_ChIP, minOverlap = 2)
AR_ChIP_Counts## 15 Samples, 21567 sites in matrix:
## ID Tissue Factor Condition Treatment Replicate Reads FRiP
## 1 DHT_30mins_Rep1 LNCaP AR DHT DHT_30min 1 26550007 0.02
## 2 DHT_30mins_Rep2 LNCaP AR DHT DHT_30min 2 27478958 0.02
## 3 DHT_30mins_Rep3 LNCaP AR DHT DHT_30min 3 26451349 0.02
## 4 DHT_2hrs_Rep1 LNCaP AR DHT DHT_2hrs 1 24171439 0.03
## 5 DHT_2hrs_Rep2 LNCaP AR DHT DHT_2hrs 2 29659386 0.02
## 6 DHT_2hrs_Rep3 LNCaP AR DHT DHT_2hrs 3 26116126 0.02
## 7 DHT_4hrs_Rep1 LNCaP AR DHT DHT_4hrs 1 25251177 0.04
## 8 DHT_4hrs_Rep2 LNCaP AR DHT DHT_4hrs 2 29061139 0.02
## 9 DHT_4hrs_Rep3 LNCaP AR DHT DHT_4hrs 3 28129241 0.02
## 10 DHT_16hrs_Rep1 LNCaP AR DHT DHT_16hrs 1 25636392 0.03
## 11 DHT_16hrs_Rep2 LNCaP AR DHT DHT_16hrs 2 26669872 0.04
## 12 DHT_16hrs_Rep3 LNCaP AR DHT DHT_16hrs 3 25367441 0.03
## 13 EtOH_Rep1 LNCaP AR EtOH EtOH 1 25451888 0.01
## 14 EtOH_Rep2 LNCaP AR EtOH EtOH 2 27397532 0.01
## 15 EtOH_Rep3 LNCaP AR EtOH EtOH 3 25657425 0.01
par(mar = rep(2, 4))
plot(AR_ChIP_Counts)Heatmap showing the correlation between the location and signal strength of peaks in each data set
Plot the average profile of AR signal over the peaks that have been considered in this DBA:
## Plotting...
Average AR signal over peaks input into the DBA analysis with replicates merged
Perform normalisation using a DESeq2 calculation of the geometric mean per peak across each sample. See the following links for more details: https://bioconductor.org/packages/devel/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf https://genomebiology.biomedcentral.com/articles/10.1186/s13059-014-0550-8
#AR_ChIP_Counts_Norm <- dba.normalize(AR_ChIP_Counts, normalize=DBA_NORM_NATIVE)Contrast by Treatment will contrast each DHT time point and EtOH.
#Create Diferential analysis by defining contrast design for TREATMENT
#AR_ChIP_Treatment <- dba.contrast(AR_ChIP_Counts_Norm, categories = DBA_TREATMENT, minMembers = 2)
AR_ChIP_Treatment## 15 Samples, 21567 sites in matrix:
## ID Tissue Factor Condition Treatment Replicate Reads FRiP
## 1 DHT_30mins_Rep1 LNCaP AR DHT DHT_30min 1 26550007 0.02
## 2 DHT_30mins_Rep2 LNCaP AR DHT DHT_30min 2 27478958 0.02
## 3 DHT_30mins_Rep3 LNCaP AR DHT DHT_30min 3 26451349 0.02
## 4 DHT_2hrs_Rep1 LNCaP AR DHT DHT_2hrs 1 24171439 0.03
## 5 DHT_2hrs_Rep2 LNCaP AR DHT DHT_2hrs 2 29659386 0.02
## 6 DHT_2hrs_Rep3 LNCaP AR DHT DHT_2hrs 3 26116126 0.02
## 7 DHT_4hrs_Rep1 LNCaP AR DHT DHT_4hrs 1 25251177 0.04
## 8 DHT_4hrs_Rep2 LNCaP AR DHT DHT_4hrs 2 29061139 0.02
## 9 DHT_4hrs_Rep3 LNCaP AR DHT DHT_4hrs 3 28129241 0.02
## 10 DHT_16hrs_Rep1 LNCaP AR DHT DHT_16hrs 1 25636392 0.03
## 11 DHT_16hrs_Rep2 LNCaP AR DHT DHT_16hrs 2 26669872 0.04
## 12 DHT_16hrs_Rep3 LNCaP AR DHT DHT_16hrs 3 25367441 0.03
## 13 EtOH_Rep1 LNCaP AR EtOH EtOH 1 25451888 0.01
## 14 EtOH_Rep2 LNCaP AR EtOH EtOH 2 27397532 0.01
## 15 EtOH_Rep3 LNCaP AR EtOH EtOH 3 25657425 0.01
##
## Design: [~Treatment] | 10 Contrasts:
## Factor Group Samples Group2 Samples2
## 1 Treatment DHT_30min 3 DHT_2hrs 3
## 2 Treatment DHT_30min 3 DHT_4hrs 3
## 3 Treatment DHT_30min 3 DHT_16hrs 3
## 4 Treatment DHT_30min 3 EtOH 3
## 5 Treatment DHT_2hrs 3 DHT_4hrs 3
## 6 Treatment DHT_2hrs 3 DHT_16hrs 3
## 7 Treatment DHT_2hrs 3 EtOH 3
## 8 Treatment DHT_4hrs 3 DHT_16hrs 3
## 9 Treatment DHT_4hrs 3 EtOH 3
## 10 Treatment EtOH 3 DHT_16hrs 3
The parameter “bBlacklist=TRUE” will detect the genome alignment and remove any peaks within blacklisted unmappable regions from the DBA.
#run DBA analysis
#AR_ChIP_Treatment_DBA <- dba.analyze(AR_ChIP_Treatment, bBlacklist=TRUE)
AR_ChIP_Treatment_DBA## 15 Samples, 21508 sites in matrix:
## ID Tissue Factor Condition Treatment Replicate Reads FRiP
## 1 DHT_30mins_Rep1 LNCaP AR DHT DHT_30min 1 26550007 0.01
## 2 DHT_30mins_Rep2 LNCaP AR DHT DHT_30min 2 27478958 0.02
## 3 DHT_30mins_Rep3 LNCaP AR DHT DHT_30min 3 26451349 0.01
## 4 DHT_2hrs_Rep1 LNCaP AR DHT DHT_2hrs 1 24171439 0.03
## 5 DHT_2hrs_Rep2 LNCaP AR DHT DHT_2hrs 2 29659386 0.02
## 6 DHT_2hrs_Rep3 LNCaP AR DHT DHT_2hrs 3 26116126 0.02
## 7 DHT_4hrs_Rep1 LNCaP AR DHT DHT_4hrs 1 25251177 0.03
## 8 DHT_4hrs_Rep2 LNCaP AR DHT DHT_4hrs 2 29061139 0.02
## 9 DHT_4hrs_Rep3 LNCaP AR DHT DHT_4hrs 3 28129241 0.02
## 10 DHT_16hrs_Rep1 LNCaP AR DHT DHT_16hrs 1 25636392 0.03
## 11 DHT_16hrs_Rep2 LNCaP AR DHT DHT_16hrs 2 26669872 0.04
## 12 DHT_16hrs_Rep3 LNCaP AR DHT DHT_16hrs 3 25367441 0.03
## 13 EtOH_Rep1 LNCaP AR EtOH EtOH 1 25451888 0.01
## 14 EtOH_Rep2 LNCaP AR EtOH EtOH 2 27397532 0.01
## 15 EtOH_Rep3 LNCaP AR EtOH EtOH 3 25657425 0.01
##
## Design: [~Treatment] | 10 Contrasts:
## Factor Group Samples Group2 Samples2 DB.DESeq2
## 1 Treatment DHT_30min 3 DHT_2hrs 3 203
## 2 Treatment DHT_30min 3 DHT_4hrs 3 3060
## 3 Treatment DHT_30min 3 DHT_16hrs 3 12804
## 4 Treatment DHT_30min 3 EtOH 3 10951
## 5 Treatment DHT_2hrs 3 DHT_4hrs 3 0
## 6 Treatment DHT_2hrs 3 DHT_16hrs 3 5670
## 7 Treatment DHT_2hrs 3 EtOH 3 15279
## 8 Treatment DHT_4hrs 3 DHT_16hrs 3 791
## 9 Treatment DHT_4hrs 3 EtOH 3 17376
## 10 Treatment EtOH 3 DHT_16hrs 3 20075
#op <- par(oma=c(5,7,1,1))
#dev.off()
#par(mar = rep(2, 4))
plot(AR_ChIP_Treatment_DBA, contrast=4)Correlation heatmap for DBA in 30 mins DHT vs EtOH contrast
#op <- par(oma=c(5,7,1,1))
#dev.off()
#par(mar = rep(2, 4))
plot(AR_ChIP_Treatment_DBA, contrast=7)Correlation heatmap for DBA in 2hrs DHT vs EtOH contrast
#op <- par(oma=c(5,7,1,1))
#dev.off()
#par(mar = rep(2, 4))
plot(AR_ChIP_Treatment_DBA, contrast=9)Correlation heatmap for DBA in 4hrs DHT vs EtOH contrast
Note: this will have negative values for peaks gained in 16hrs DHT when using dba.report include “bFlip=TRUE” to reorder the contrast so that DHT gained peaks have positive values.
#op <- par(oma=c(5,7,1,1))
#dev.off()
#par(mar = rep(2, 4))
plot(AR_ChIP_Treatment_DBA, contrast=10)Correlation heatmap for DBA in 16hrs DHT vs EtOH contrast
DHT Treatment DBA average profile over peaks called as gained and lost (in DHT treatment compared to EtOH):
30 minute DHT Treatment DBA average profile over peaks called as gained and lost (in DHT treatment compared to EtOH):
2 hour DHT Treatment DBA average profile over peaks called as gained and lost (in DHT treatment compared to EtOH):
4 hour DHT Treatment DBA average profile over peaks called as gained and lost (in DHT treatment compared to EtOH):
16 hour DHT Treatment DBA average profile over peaks called as gained and lost (in DHT treatment compared to EtOH):
#30 mins DHT DBA
#DHT_05hrs_DBA <- dba.report(AR_ChIP_Treatment_DBA, contrast = 4)
DHT_05hrs_DBA## GRanges object with 10951 ranges and 6 metadata columns:
## seqnames ranges strand | Conc Conc_DHT_30min
## <Rle> <IRanges> <Rle> | <numeric> <numeric>
## 5870 chr14 35929176-35929576 * | 6.91173 7.83579
## 15861 chr5 147911454-147911854 * | 7.55291 8.40846
## 19155 chr8 39832054-39832454 * | 6.71134 7.64054
## 4132 chr11 130025334-130025734 * | 6.47689 7.37827
## 13724 chr4 77564121-77564521 * | 6.30404 7.23172
## ... ... ... ... . ... ...
## 1073 chr1 119101643-119102043 * | 3.69945 4.09800
## 12119 chr3 71051880-71052280 * | 2.96759 3.44430
## 3954 chr11 107708876-107709276 * | 3.32195 3.76299
## 15173 chr5 76637536-76637936 * | 4.10292 4.50357
## 3029 chr10 118394591-118394991 * | 3.75294 4.15259
## Conc_EtOH Fold p-value FDR
## <numeric> <numeric> <numeric> <numeric>
## 5870 3.62604 3.98783 1.80081e-33 3.87318e-29
## 15861 5.16125 3.12515 4.67519e-32 5.02770e-28
## 19155 3.32725 4.05923 1.46604e-30 1.05105e-26
## 4132 3.55712 3.62426 1.48118e-29 7.96433e-26
## 13724 2.94998 4.02297 3.15766e-29 1.35830e-25
## ... ... ... ... ...
## 1073 3.14692 0.857613 0.0254069 0.0499179
## 12119 2.25074 1.013219 0.0254180 0.0499322
## 3954 2.68354 0.936510 0.0254203 0.0499322
## 15173 3.54632 0.849464 0.0254212 0.0499322
## 3029 3.19826 0.858083 0.0254370 0.0499589
## -------
## seqinfo: 26 sequences from an unspecified genome; no seqlengths
#2hrs DHT DBA
#DHT_2hrs_DBA <- dba.report(AR_ChIP_Treatment_DBA, contrast = 7)
DHT_2hrs_DBA## GRanges object with 15279 ranges and 6 metadata columns:
## seqnames ranges strand | Conc Conc_DHT_2hrs Conc_EtOH
## <Rle> <IRanges> <Rle> | <numeric> <numeric> <numeric>
## 5870 chr14 35929176-35929576 * | 7.59029 8.54332 3.62604
## 4132 chr11 130025334-130025734 * | 7.31110 8.25662 3.55712
## 15861 chr5 147911454-147911854 * | 8.06438 8.96458 5.16125
## 19885 chr8 123405488-123405888 * | 7.39286 8.34903 3.33040
## 19155 chr8 39832054-39832454 * | 7.42305 8.38023 3.32725
## ... ... ... ... . ... ... ...
## 17138 chr6 129938238-129938638 * | 3.55393 3.96703 2.97282
## 3886 chr11 97854035-97854435 * | 3.43700 3.86027 2.83542
## 7298 chr16 14181148-14181548 * | 3.49312 3.91855 2.88716
## 5033 chr12 101526694-101527094 * | 3.70636 4.12377 3.11664
## 8302 chr17 83101936-83102336 * | 3.35950 3.78130 2.76093
## Fold p-value FDR
## <numeric> <numeric> <numeric>
## 5870 4.74516 1.80427e-45 3.88062e-41
## 4132 4.55036 1.52129e-44 1.63599e-40
## 15861 3.72024 1.11104e-43 7.96541e-40
## 19885 4.82833 3.68649e-42 1.98222e-38
## 19155 4.85494 1.04159e-41 4.48049e-38
## ... ... ... ...
## 17138 0.929185 0.0354348 0.0498941
## 3886 0.955483 0.0354417 0.0499004
## 7298 0.941980 0.0354587 0.0499211
## 5033 0.928468 0.0354963 0.0499709
## 8302 0.939781 0.0355028 0.0499767
## -------
## seqinfo: 26 sequences from an unspecified genome; no seqlengths
#4hrs DHT DBA
#DHT_4hrs_DBA <- dba.report(AR_ChIP_Treatment_DBA, contrast = 9)
DHT_4hrs_DBA## GRanges object with 17376 ranges and 6 metadata columns:
## seqnames ranges strand | Conc Conc_DHT_4hrs Conc_EtOH
## <Rle> <IRanges> <Rle> | <numeric> <numeric> <numeric>
## 4132 chr11 130025334-130025734 * | 7.79474 8.75599 3.55712
## 3058 chr10 122121013-122121413 * | 7.80756 8.78212 2.96885
## 19885 chr8 123405488-123405888 * | 7.75916 8.72527 3.33040
## 19155 chr8 39832054-39832454 * | 7.76586 8.73221 3.32725
## 5870 chr14 35929176-35929576 * | 7.68679 8.64291 3.62604
## ... ... ... ... . ... ... ...
## 14297 chr4 155676373-155676773 * | 3.50732 3.93576 2.89515
## 7413 chr16 35131709-35132109 * | 3.53922 3.95717 2.94841
## 9173 chr2 27706222-27706622 * | 3.38367 3.78171 2.83211
## 8536 chr18 45010317-45010717 * | 3.11013 3.57034 2.43035
## 15415 chr5 102461653-102462053 * | 3.55364 3.97013 2.96576
## Fold p-value FDR
## <numeric> <numeric> <numeric>
## 4132 5.06808 1.89560e-54 4.07705e-50
## 3058 5.62738 1.00814e-48 8.95912e-45
## 19885 5.23132 1.24964e-48 8.95912e-45
## 19155 5.23538 1.21880e-47 6.55348e-44
## 5870 4.87486 2.48953e-47 1.07090e-43
## ... ... ... ...
## 14297 0.960092 0.0402267 0.0498040
## 7413 0.930127 0.0402439 0.0498225
## 9173 0.908855 0.0402576 0.0498366
## 8536 1.041325 0.0402901 0.0498740
## 15415 0.943536 0.0403823 0.0499851
## -------
## seqinfo: 27 sequences from an unspecified genome; no seqlengths
#16hrs DHT DBA
#DHT_16hrs_DBA <- dba.report(AR_ChIP_Treatment_DBA, contrast = 10, bFlip = TRUE)
DHT_16hrs_DBA## GRanges object with 20075 ranges and 6 metadata columns:
## seqnames ranges strand | Conc Conc_DHT_16hrs
## <Rle> <IRanges> <Rle> | <numeric> <numeric>
## 4132 chr11 130025334-130025734 * | 8.44337 9.41877
## 15861 chr5 147911454-147911854 * | 8.63831 9.57203
## 5870 chr14 35929176-35929576 * | 8.16683 9.13550
## 3058 chr10 122121013-122121413 * | 8.26546 9.24699
## 15608 chr5 123397491-123397891 * | 8.22586 9.19141
## ... ... ... ... . ... ...
## 9113 chr2 20196556-20196956 * | 3.59762 4.01122
## 19276 chr8 56796918-56797318 * | 3.93440 4.30241
## 16273 chr6 12331849-12332249 * | 3.34720 3.76785
## 20586 chr9 95306018-95306418 * | 3.24537 3.69252
## 16930 chr6 109296415-109296815 * | 3.92043 4.28395
## Conc_EtOH Fold p-value FDR
## <numeric> <numeric> <numeric> <numeric>
## 4132 3.55712 5.75037 4.43080e-69 9.52977e-65
## 15861 5.16125 4.35565 1.66966e-58 1.79556e-54
## 5870 3.62604 5.39068 5.88228e-57 4.21721e-53
## 3058 2.96885 6.12695 1.16389e-56 6.25821e-53
## 15608 3.82066 5.26438 2.59083e-55 1.11447e-51
## ... ... ... ... ...
## 9113 3.01553 0.951511 0.0465946 0.0499306
## 19276 3.43914 0.841820 0.0465991 0.0499329
## 16273 2.75095 0.973447 0.0466137 0.0499461
## 20586 2.59397 1.042409 0.0466496 0.0499820
## 16930 3.43328 0.824404 0.0466576 0.0499882
## -------
## seqinfo: 29 sequences from an unspecified genome; no seqlengths
Use the ‘dba.report’ function to pull out the differential lost (enriched in DHT treatment) and gained peaks (enriched in EtOH), as well as, non-differential constitutive AR binding sites (equally enriched in DHT treatment):
DBA 30 mins DHT vs EtOH:
#DHT_05hrs_DBA_Gained_AR <- dba.report(AR_ChIP_Treatment_DBA, contrast = 4, bGain=TRUE, DataType = DBA_DATA_GRANGES)
#DHT_05hrs_DBA_Lost_AR <- dba.report(AR_ChIP_Treatment_DBA, contrast = 4, bLoss=TRUE, DataType = DBA_DATA_GRANGES)
#DHT_05hrs_DBA_Constitutive_AR <- dba.report(AR_ChIP_Treatment_DBA, contrast = 4, bNotDB=TRUE , DataType = DBA_DATA_GRANGES)
#DHT_05hrs_DBA_Gained_AR_Peaks <- dba.peakset(DHT_05hrs_DBA_Gained_AR, 2, bRetrieve = TRUE)
#DHT_05hrs_DBA_Lost_AR_Peaks <- dba.peakset(DHT_05hrs_DBA_Lost_AR, 2, bRetrieve = TRUE)
#DHT_05hrs_DBA_Constitutive_AR_Peaks <- dba.peakset(DHT_05hrs_DBA_Constitutive_AR, 1, bRetrieve = TRUE)
#write out DBA peakset files
#setwd("/Volumes/griw/Cancer-Epigenetics/Projects/LNCaP_VCaP_DHT_Hi-C/Elyssa_2021_analysis_and_integration/DBA_Peaks")
#write.table(DHT_05hrs_DBA, file="DHT_05hrsvsEtOH_DBA_Peaks.bed", quote=F, sep="\t", row.names=F, col.names=F)
#write.table(DHT_05hrs_DBA_Gained_AR_Peaks, file="DHT_05hrs_DBA_Gained_AR_Peaks.bed", quote=F, sep="\t", row.names=F, col.names=F)
#write.table(DHT_05hrs_DBA_Lost_AR_Peaks, file="DHT_05hrs_DBA_Lost_AR_Peaks.bed", quote=F, sep="\t", row.names=F, col.names=F)
#write.table(DHT_05hrs_DBA_Constitutive_AR_Peaks, file="DHT_05hrs_DBA_Constitutive_AR_Peaks.bed", quote=F, sep="\t", row.names=F, col.names=F)DBA 2hrs DHT vs EtOH:
#DHT_2hrs_DBA_Gained_AR <- dba.report(AR_ChIP_Treatment_DBA, contrast = 7, bGain=TRUE, DataType = DBA_DATA_GRANGES)
#DHT_2hrs_DBA_Lost_AR <- dba.report(AR_ChIP_Treatment_DBA, contrast = 7, bLoss=TRUE, DataType = DBA_DATA_GRANGES)
#DHT_2hrs_DBA_Constitutive_AR <- dba.report(AR_ChIP_Treatment_DBA, contrast = 7, bNotDB=TRUE , DataType = DBA_DATA_GRANGES)
#DHT_2hrs_DBA_Gained_AR_Peaks <- dba.peakset(DHT_2hrs_DBA_Gained_AR, 2, bRetrieve = TRUE)
#DHT_2hrs_DBA_Lost_AR_Peaks <- No DB Lost peaks
#DHT_2hrs_DBA_Constitutive_AR_Peaks <- dba.peakset(DHT_2hrs_DBA_Constitutive_AR, 1, bRetrieve = TRUE)
#write out DBA peakset files
#setwd("/Volumes/griw/Cancer-Epigenetics/Projects/LNCaP_VCaP_DHT_Hi-C/Elyssa_2021_analysis_and_integration/DBA_Peaks")
#write.table(DHT_2hrs_DBA, file="DHT_2hrsvsEtOH_DBA_Peaks.bed", quote=F, sep="\t", row.names=F, col.names=F)
#write.table(DHT_2hrs_DBA_Gained_AR_Peaks, file="DHT_2hrs_DBA_Gained_AR_Peaks.bed", quote=F, sep="\t", row.names=F, col.names=F)
#write.table(DHT_2hrs_DBA_Constitutive_AR_Peaks, file="DHT_2hrs_DBA_Constitutive_AR_Peaks.bed", quote=F, sep="\t", row.names=F, col.names=F)DBA 4hrs DHT vs EtOH:
#DHT_4hrs_DBA_Gained_AR <- dba.report(AR_ChIP_Treatment_DBA, contrast = 9, bGain=TRUE, DataType = DBA_DATA_GRANGES)
#DHT_4hrs_DBA_Lost_AR <- dba.report(AR_ChIP_Treatment_DBA, contrast = 9, bLoss=TRUE, DataType = DBA_DATA_GRANGES)
#DHT_4hrs_DBA_Constitutive_AR <- dba.report(AR_ChIP_Treatment_DBA, contrast = 9, bNotDB=TRUE , DataType = DBA_DATA_GRANGES)
#DHT_4hrs_DBA_Gained_AR_Peaks <- dba.peakset(DHT_4hrs_DBA_Gained_AR, 2, bRetrieve = TRUE)
#DHT_4hrs_DBA_Lost_AR_Peaks <- dba.peakset(DHT_4hrs_DBA_Lost_AR, 2, bRetrieve = TRUE)
#DHT_4hrs_DBA_Constitutive_AR_Peaks <- dba.peakset(DHT_4hrs_DBA_Constitutive_AR, 1, bRetrieve = TRUE)
#write out DBA peakset files
#setwd("/Volumes/griw/Cancer-Epigenetics/Projects/LNCaP_VCaP_DHT_Hi-C/Elyssa_2021_analysis_and_integration/DBA_Peaks")
#write.table(DHT_4hrs_DBA, file="DHT_4hrsvsEtOH_DBA_Peaks.bed", quote=F, sep="\t", row.names=F, col.names=F)
#write.table(DHT_4hrs_DBA_Gained_AR_Peaks, file="DHT_4hrs_DBA_Gained_AR_Peaks.bed", quote=F, sep="\t", row.names=F, col.names=F)
#write.table(DHT_4hrs_DBA_Lost_AR_Peaks, file="DHT_4hrs_DBA_Lost_AR_Peaks.bed", quote=F, sep="\t", row.names=F, col.names=F)
#write.table(DHT_4hrs_DBA_Constitutive_AR_Peaks, file="DHT_4hrs_DBA_Constitutive_AR_Peaks.bed", quote=F, sep="\t", row.names=F, col.names=F)DBA 16hrs DHT vs EtOH:
#DHT_16hrs_DBA_Gained_AR <- dba.report(AR_ChIP_Treatment_DBA, contrast = 10, bFlip = TRUE, bGain=TRUE, DataType = DBA_DATA_GRANGES)
#DHT_16hrs_DBA_Lost_AR <- dba.report(AR_ChIP_Treatment_DBA, contrast = 10, bFlip = TRUE, bLoss=TRUE, DataType = DBA_DATA_GRANGES)
#DHT_16hrs_DBA_Constitutive_AR <- dba.report(AR_ChIP_Treatment_DBA, contrast = 10, bFlip = TRUE, bNotDB=TRUE , DataType = DBA_DATA_GRANGES)
#DHT_16hrs_DBA_Gained_AR_Peaks <- dba.peakset(DHT_16hrs_DBA_Gained_AR, 2, bRetrieve = TRUE)
#DHT_16hrs_DBA_Lost_AR_Peaks <- dba.peakset(DHT_16hrs_DBA_Lost_AR, 2, bRetrieve = TRUE)
#DHT_16hrs_DBA_Constitutive_AR_Peaks <- dba.peakset(DHT_16hrs_DBA_Constitutive_AR, 1, bRetrieve = TRUE)
#write out DBA peakset files
#setwd("/Volumes/griw/Cancer-Epigenetics/Projects/LNCaP_VCaP_DHT_Hi-C/Elyssa_2021_analysis_and_integration/DBA_Peaks")
#write.table(DHT_16hrs_DBA, file="DHT_16hrsvsEtOH_DBA_Peaks.bed", quote=F, sep="\t", row.names=F, col.names=F)
#write.table(DHT_16hrs_DBA_Gained_AR_Peaks, file="DHT_16hrs_DBA_Gained_AR_Peaks.bed", quote=F, sep="\t", row.names=F, col.names=F)
#write.table(DHT_16hrs_DBA_Lost_AR_Peaks, file="DHT_16hrs_DBA_Lost_AR_Peaks.bed", quote=F, sep="\t", row.names=F, col.names=F)
#write.table(DHT_16hrs_DBA_Constitutive_AR_Peaks, file="DHT_16hrs_DBA_Constitutive_AR_Peaks.bed", quote=F, sep="\t", row.names=F, col.names=F)PCA plot for all peak sets:
Visualisation of AR Treatment DBA 30 mins DHT vs EtOH contrast:
Visualisation of AR Treatment DBA 2hrs DHT vs EtOH contrast:
Visualisation of AR Treatment DBA 4hrs DHT vs EtOH contrast:
Visualisation of AR Treatment DBA 16hrs DHT vs EtOH contrast: Remember the “Lost” peaks here are gained in 16hrs DHT vs EtOH due to the order of groups in the contrast
These plots are including all differential sites, lost and gained, for each DHT time point vs EtOH.
Plot the overlap between DBA identified peaks in each DHT time point and average signal in overlapping and uniquely identified peaks:
#Venn of olaps in DBA lost and Stable sites at 144 and 72 hrs
#repObj <- dba.report(AR_ChIP_Treatment_DBA, contrast=c(4, 7, 9, 10), bDB=TRUE)
#overlaps <- dba.plotVenn(repObj, repObj$masks$DB)
#names(overlaps) <- c("only_DBA_05hrs", "only_DBA_2hrs", "only_DBA_4hrs", "only_DBA_16hrs", "05_and2hrs", "05_and4hrs", "05_and16hrs", "2_and4hrs", "2_and16hrs", "4_and16hrs", "not_05hrs", "not_2hrs", "not_4hrs", "not_16hrs", "All")#profiles <- dba.plotProfile(AR_ChIP_Treatment_DBA, sites=overlaps, samples=list(AR_ChIP_Treatment_DBA$masks$DHT_30min, AR_ChIP_Treatment_DBA$masks$DHT_2hrs, AR_ChIP_Treatment_DBA$masks$DHT_4hrs, AR_ChIP_Treatment_DBA$masks$DHT_16hrs))
#Enriched heatmap
library(profileplyr)
generateEnrichedHeatmap(profiles, include_group_annotation = TRUE, extra_annotation_columns = NULL)Comparison and intersection of differential AR peaks at each DHT timepoint. Heatmap including the average AR signal over regins of differential peaks per DHT treatment timepoint.
Overlapping sites from the DBA peaks called lost, gained and constitutive at each DHT time point:
#DataWrangling
library(GenomicRanges)
library(rtracklayer)
#make one common set of gained AR peaks from all granges (treatment compared to EtOH)
#all_Gained_AR_Peaks <- GRangesList("Gained_05hrs"=Gained_05hrs, "Gained_2hrs"=Gained_2hrs, "Gained_4hrs"=Gained_4hrs, "Gained_16hrs"=Gained_16hrs)
#Merged_Gained_AR_Peaks <- unlist(as(all_Gained_AR_Peaks, "GRangesList"), use.names = FALSE)
#gr.1 <- Gained_05hrs
#gr.2 <- Gained_2hrs
#gr.3 <- Gained_4hrs
#gr.4 <- Gained_16hrs
#base <- Merged_Gained_AR_Peaks
#z <- matrix(0,length(base),4)
#z[subjectHits(findOverlaps(gr.1, base)),1] <- 1
#z[subjectHits(findOverlaps(gr.2, base)),2] <- 1
#z[subjectHits(findOverlaps(gr.3, base)),3] <- 1
#z[subjectHits(findOverlaps(gr.4, base)),4] <- 1
#colnames(z) <- paste0("gr.", 1:4)
#mcols(base) <- z
base## GRanges object with 63678 ranges and 4 metadata columns:
## seqnames ranges strand | gr.1 gr.2 gr.3
## <Rle> <IRanges> <Rle> | <numeric> <numeric> <numeric>
## [1] chr1 591030-591430 * | 1 1 1
## [2] chr1 815429-815829 * | 1 1 1
## [3] chr1 2248450-2248850 * | 1 1 1
## [4] chr1 2410821-2411221 * | 1 1 1
## [5] chr1 2454011-2454411 * | 1 1 1
## ... ... ... ... . ... ... ...
## [63674] chrY 20929992-20930392 * | 1 1 1
## [63675] chrY 21334386-21334786 * | 1 1 1
## [63676] chrY 21631014-21631414 * | 0 1 1
## [63677] chrY 21641931-21642331 * | 0 1 1
## [63678] chrY 21672279-21672679 * | 1 1 1
## gr.4
## <numeric>
## [1] 1
## [2] 1
## [3] 1
## [4] 1
## [5] 1
## ... ...
## [63674] 1
## [63675] 1
## [63676] 1
## [63677] 1
## [63678] 1
## -------
## seqinfo: 29 sequences from an unspecified genome; no seqlengths
#Olaps_DHT_Timepoints <- base
#Olaps_AR_DBA_DHT_Timecourse = as(base, "data.frame")
#Olaps_AR_DBA_DHT_Timecourse <- Olaps_AR_DBA_DHT_Timecourse %>% rename(AR_30min_DHT = gr.1)
#Olaps_AR_DBA_DHT_Timecourse <- Olaps_AR_DBA_DHT_Timecourse %>% rename(AR_2hrs_DHT = gr.2)
#Olaps_AR_DBA_DHT_Timecourse <- Olaps_AR_DBA_DHT_Timecourse %>% rename(AR_4hrs_DHT = gr.3)
#Olaps_AR_DBA_DHT_Timecourse <- Olaps_AR_DBA_DHT_Timecourse %>% rename(AR_16hrs_DHT = gr.4)
#write.csv(Olaps_AR_DBA_DHT_Timecourse, file = "Olaps_AR_DBA_DHT_Timecourse.csv")#UpSet Plot
upset(Olaps_AR_DBA_DHT_Timecourse, sets = c("AR_30min_DHT", "AR_2hrs_DHT", "AR_4hrs_DHT", "AR_16hrs_DHT"), sets.bar.color = "#56B4E9",
order.by = "freq", empty.intersections = "on")Upset plot of common and unique AR peaks differentially gained at each DHT time point.
#Complex UpSet
library(ggplot2)
library(ComplexUpset)##
## Attaching package: 'ComplexUpset'
## The following object is masked from 'package:UpSetR':
##
## upset
upset(
Olaps_AR_DBA_DHT_Timecourse, c("AR_30min_DHT", "AR_2hrs_DHT", "AR_4hrs_DHT", "AR_16hrs_DHT"),
width_ratio=0.2,
group_by='sets',
queries=list(
upset_query(group='AR_30min_DHT', color='maroon'),
upset_query(group='AR_2hrs_DHT', color='blue'),
upset_query(group='AR_4hrs_DHT', color='orange'),
upset_query(group='AR_16hrs_DHT', color='purple'),
upset_query(set='AR_30min_DHT', fill='maroon'),
upset_query(set='AR_2hrs_DHT', fill='blue'),
upset_query(set='AR_4hrs_DHT', fill='orange'),
upset_query(set='AR_16hrs_DHT', fill='purple')
)
)## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
Complex Upset plot of common and unique AR peaks differentially gained at each DHT time point.
upset(Olaps_AR_DBA_DHT_Timecourse, c("AR_30min_DHT", "AR_2hrs_DHT", "AR_4hrs_DHT", "AR_16hrs_DHT"),
width_ratio=0.2,
queries=list(
upset_query(set='AR_30min_DHT', fill='maroon'),
upset_query(set='AR_2hrs_DHT', fill='blue'),
upset_query(set='AR_4hrs_DHT', fill='orange'),
upset_query(set='AR_16hrs_DHT', fill='purple')
)
)## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
Complex Upset plot of common and unique AR peaks differentially gained at each DHT time point.
library(ChIPpeakAnno)
library(dplyr)
library(tidyverse)
library(EnsDb.Hsapiens.v86)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
library(reshape)
library(readr)
library(tibble)wrangling data into the right format:
library(AnnotationHub)## Loading required package: BiocFileCache
## Loading required package: dbplyr
##
## Attaching package: 'dbplyr'
## The following objects are masked from 'package:dplyr':
##
## ident, sql
##
## Attaching package: 'AnnotationHub'
## The following object is masked from 'package:rtracklayer':
##
## hubUrl
## The following object is masked from 'package:Biobase':
##
## cache
library(ensembldb)
library(tibble)
library(dplyr)
#ah <- AnnotationHub()
#HomoSapien.v95.Db <- query(ah, pattern = c("Homo sapiens", "EnsDb", 95))
#HomoSapien.v95.E.Db <- HomoSapien.v95.Db[[1]]
#hg38annoData <- genes(HomoSapien.v95.E.Db)
#hg38annoData[1:2]
#genes <- hg38annoData## duplicated or NA names found.
## Rename all the names by numbers.
## duplicated or NA names found.
## Rename all the names by numbers.
## duplicated or NA names found.
## Rename all the names by numbers.
## duplicated or NA names found.
## Rename all the names by numbers.
Venn Diagram of overlaps in AR peaks differentially gained at each DHT time point and Hg38 gene annotations.
DBA_peaks_genes_venn <- makeVennDiagram(list(GENES_unique, Gained_05hrs, Const_05hrs), NameOfPeaks=c("Genes", "AR_30min_Gained", "AR_30min_Const"),
totalTest=100000,
fill=c("#FFC5D0", "#A4DDEF", "#E4CBF9"), # circle fill color
col=c("#E16A86", "#00A6CA", "#B675E0"), #circle border color
cat.col=c("#E16A86", "#00A6CA", "#B675E0"))Venn Diagram of overlaps in gained or constitutive AR peaks from the 30 minute DHT time point with Hg38 gene annotations.
DBA_peaks_genes_venn <- makeVennDiagram(list(GENES_unique, Gained_2hrs, Const_2hrs), NameOfPeaks=c("Genes", "AR_2hrs_Gained", "AR_2hrs_Const"),
totalTest=100000,
fill=c("#FFC5D0", "#E2D4A8", "#A8E1BF"), # circle fill color
col=c("#E16A86", "#AA9000", "#00AA5A"), #circle border color
cat.col=c("#E16A86", "#AA9000", "#00AA5A"))Venn Diagram of overlaps in gained or constitutive AR peaks from the 2 hour DHT time point with Hg38 gene annotations.
DBA_peaks_genes_venn <- makeVennDiagram(list(GENES_unique, Gained_4hrs, Const_4hrs), NameOfPeaks=c("Genes", "AR_4hrs_Gained", "AR_4hrs_Const"),
totalTest=100000,
fill=c("#FFC5D0", "#E2D4A8", "#A8E1BF"), # circle fill color
col=c("#E16A86", "#AA9000", "#00AA5A"), #circle border color
cat.col=c("#E16A86", "#AA9000", "#00AA5A"))Venn Diagram of overlaps in gained or constitutive AR peaks from the 4 hour DHT time point with Hg38 gene annotations.
DBA_peaks_genes_venn <- makeVennDiagram(list(GENES_unique, Gained_16hrs, Const_16hrs), NameOfPeaks=c("Genes", "AR_16hrs_Gained", "AR_16hrs_Const"),
totalTest=100000,
fill=c("#FFC5D0", "#E2D4A8", "#A8E1BF"), # circle fill color
col=c("#E16A86", "#AA9000", "#00AA5A"), #circle border color
cat.col=c("#E16A86", "#AA9000", "#00AA5A"))Venn Diagram of overlaps in gained or constitutive AR peaks from the 16 hour DHT time point with Hg38 gene annotations.
#Gained_AR_grl <- GRangesList("Gained_05hrs"=Gained_05hrs, "Gained_2hrs"=Gained_2hrs, "Gained_4hrs"=Gained_4hrs, "Gained_16hrs"=Gained_16hrs)Visualisation of the distribution of gained AR peaks in various genomic elements.
Visualisation of the proportions of AR peaks gained after 30 minutes of DHT treatment within various genomic elements.
Visualisation of the proportions of AR peaks gained after 2 hours of DHT treatment within various genomic elements.
Visualisation of the proportions of AR peaks gained after 4 hours of DHT treatment within various genomic elements.
Visualisation of the proportions of AR peaks gained after 16 hours of DHT treatment within various genomic elements.
Gene annotation for DBA Gained AR peaks:
#As shown from the distribution of aggregated peak numbers around TSS and the distribution of peaks in different of
#chromosome regions, most of the peaks locate around TSS. Therefore, it is reasonable to use annotatePeakInBatch or
#annoPeaks to annotate the peaks to the promoter regions of Hg38 genes. Promoters can be specified with bindingRegion.
#For the following example, promoter region is defined as upstream 2000 and downstream 500 from TSS (bindingRegion=c(-2000, 500)).
#seqlevelsStyle(hg38annoData) <- "UCSC"
Gained_05hrs_AR_Peaks.anno <- annotatePeakInBatch(Gained_05hrs,
AnnotationData=hg38annoData,
bindingRegion=c(-2000, 500),
output="overlapping",
maxgap = 1)
Gained_2hrs_AR_Peaks.anno <- annotatePeakInBatch(Gained_2hrs,
AnnotationData=hg38annoData,
bindingRegion=c(-2000, 500),
output="overlapping",
maxgap = 1)
Gained_4hrs_AR_Peaks.anno <- annotatePeakInBatch(Gained_4hrs,
AnnotationData=hg38annoData,
bindingRegion=c(-2000, 500),
output="overlapping",
maxgap = 1)
Gained_16hrs_AR_Peaks.anno <- annotatePeakInBatch(Gained_16hrs,
AnnotationData=hg38annoData,
bindingRegion=c(-2000, 500),
output="overlapping",
maxgap = 1)Write out annotated files:
library(org.Hs.eg.db)
Gained_05hrs_AR.anno <- addGeneIDs(Gained_05hrs_AR_Peaks.anno, "org.Hs.eg.db", IDs2Add = "entrez_id")
write.table(Gained_05hrs_AR.anno, file="Gained_05hrs_AR_Peaks_anno.bed", quote=F, sep="\t", row.names=F, col.names=T)
Gained_2hrs_AR.anno <- addGeneIDs(Gained_2hrs_AR_Peaks.anno, "org.Hs.eg.db", IDs2Add = "entrez_id")
write.table(Gained_2hrs_AR.anno, file="Gained_2hrs_AR_Peaks_anno.bed", quote=F, sep="\t", row.names=F, col.names=T)
Gained_4hrs_AR.anno <- addGeneIDs(Gained_4hrs_AR_Peaks.anno, "org.Hs.eg.db", IDs2Add = "entrez_id")
write.table(Gained_4hrs_AR.anno, file="Gained_4hrs_AR_Peaks_anno.bed", quote=F, sep="\t", row.names=F, col.names=T)
Gained_16hrs_AR.anno <- addGeneIDs(Gained_16hrs_AR_Peaks.anno, "org.Hs.eg.db", IDs2Add = "entrez_id")
write.table(Gained_16hrs_AR.anno, file="Gained_16hrs_AR_Peaks_anno.bed", quote=F, sep="\t", row.names=F, col.names=T)save.image("/Volumes/griw/Cancer-Epigenetics/Projects/LNCaP_VCaP_DHT_Hi-C/Elyssa_2021_analysis_and_integration/AR_ChIP_Analysis.RData")sessionInfo()## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] org.Hs.eg.db_3.13.0
## [2] AnnotationHub_3.0.1
## [3] BiocFileCache_2.0.0
## [4] dbplyr_2.1.1
## [5] reshape_0.8.8
## [6] TxDb.Hsapiens.UCSC.hg38.knownGene_3.13.0
## [7] EnsDb.Hsapiens.v86_2.99.0
## [8] ensembldb_2.16.3
## [9] AnnotationFilter_1.16.0
## [10] GenomicFeatures_1.44.0
## [11] AnnotationDbi_1.54.1
## [12] ComplexUpset_1.3.0
## [13] UpSetR_1.4.0
## [14] magick_2.7.2
## [15] knitr_1.33
## [16] colorspace_2.0-2
## [17] ChIPpeakAnno_3.26.2
## [18] VennDiagram_1.6.20
## [19] futile.logger_1.4.3
## [20] ChIPQC_1.28.0
## [21] profileplyr_1.8.0
## [22] rtracklayer_1.52.0
## [23] forcats_0.5.1
## [24] stringr_1.4.0
## [25] purrr_0.3.4
## [26] readr_2.0.0
## [27] tidyr_1.1.3
## [28] tibble_3.1.3
## [29] ggplot2_3.3.5
## [30] tidyverse_1.3.1
## [31] dplyr_1.0.7
## [32] DESeq2_1.32.0
## [33] rgl_0.107.10
## [34] DiffBind_3.2.4
## [35] SummarizedExperiment_1.22.0
## [36] Biobase_2.52.0
## [37] MatrixGenerics_1.4.0
## [38] matrixStats_0.60.0
## [39] GenomicRanges_1.44.0
## [40] GenomeInfoDb_1.28.1
## [41] IRanges_2.26.0
## [42] S4Vectors_0.30.0
## [43] BiocGenerics_0.38.0
##
## loaded via a namespace (and not attached):
## [1] apeglm_1.14.0
## [2] Rsamtools_2.8.0
## [3] rsvg_2.1.2
## [4] foreach_1.5.1
## [5] crayon_1.4.1
## [6] V8_3.4.2
## [7] MASS_7.3-54
## [8] nlme_3.1-152
## [9] backports_1.2.1
## [10] reprex_2.0.0
## [11] GOSemSim_2.18.0
## [12] rlang_0.4.11
## [13] XVector_0.32.0
## [14] readxl_1.3.1
## [15] irlba_2.3.3
## [16] limma_3.48.1
## [17] filelock_1.0.2
## [18] GOstats_2.58.0
## [19] BiocParallel_1.26.1
## [20] rjson_0.2.20
## [21] chipseq_1.42.0
## [22] bit64_4.0.5
## [23] glue_1.4.2
## [24] mixsqp_0.3-43
## [25] pheatmap_1.0.12
## [26] regioneR_1.24.0
## [27] base64url_1.4
## [28] DOSE_3.18.1
## [29] haven_2.4.1
## [30] tidyselect_1.1.1
## [31] XML_3.99-0.6
## [32] GenomicAlignments_1.28.0
## [33] org.Mm.eg.db_3.13.0
## [34] xtable_1.8-4
## [35] magrittr_2.0.1
## [36] evaluate_0.14
## [37] cli_3.0.1
## [38] zlibbioc_1.38.0
## [39] hwriter_1.3.2
## [40] rstudioapi_0.13
## [41] bslib_0.2.5.1
## [42] GreyListChIP_1.24.0
## [43] fastmatch_1.1-3
## [44] lambda.r_1.2.4
## [45] treeio_1.16.1
## [46] shiny_1.6.0
## [47] xfun_0.24
## [48] clue_0.3-59
## [49] multtest_2.48.0
## [50] cluster_2.1.2
## [51] caTools_1.18.2
## [52] tidygraph_1.2.0
## [53] KEGGREST_1.32.0
## [54] interactiveDisplayBase_1.30.0
## [55] ggrepel_0.9.1
## [56] ape_5.5
## [57] Biostrings_2.60.1
## [58] png_0.1-7
## [59] withr_2.4.2
## [60] bitops_1.0-7
## [61] ggforce_0.3.3
## [62] RBGL_1.68.0
## [63] plyr_1.8.6
## [64] cellranger_1.1.0
## [65] GSEABase_1.54.0
## [66] coda_0.19-4
## [67] pillar_1.6.2
## [68] gplots_3.1.1
## [69] GlobalOptions_0.1.2
## [70] cachem_1.0.5
## [71] TxDb.Dmelanogaster.UCSC.dm3.ensGene_3.2.2
## [72] fs_1.5.0
## [73] GetoptLong_1.0.5
## [74] vctrs_0.3.8
## [75] ellipsis_0.3.2
## [76] generics_0.1.0
## [77] EnrichedHeatmap_1.22.0
## [78] tools_4.1.0
## [79] munsell_0.5.0
## [80] tweenr_1.0.2
## [81] fgsea_1.18.0
## [82] DelayedArray_0.18.0
## [83] httpuv_1.6.1
## [84] fastmap_1.1.0
## [85] compiler_4.1.0
## [86] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [87] GenomeInfoDbData_1.2.6
## [88] gridExtra_2.3
## [89] InteractionSet_1.20.0
## [90] edgeR_3.34.0
## [91] lattice_0.20-44
## [92] AnnotationForge_1.34.0
## [93] later_1.2.0
## [94] utf8_1.2.2
## [95] soGGi_1.24.0
## [96] jsonlite_1.7.2
## [97] scales_1.1.1
## [98] TxDb.Rnorvegicus.UCSC.rn4.ensGene_3.2.2
## [99] graph_1.70.0
## [100] tidytree_0.3.4
## [101] genefilter_1.74.0
## [102] lazyeval_0.2.2
## [103] promises_1.2.0.1
## [104] doParallel_1.0.16
## [105] latticeExtra_0.6-29
## [106] R.utils_2.10.1
## [107] TxDb.Mmusculus.UCSC.mm9.knownGene_3.2.2
## [108] brew_1.0-6
## [109] checkmate_2.0.0
## [110] rmarkdown_2.9
## [111] cowplot_1.1.1
## [112] BSgenome_1.60.0
## [113] igraph_1.2.6
## [114] survival_3.2-11
## [115] numDeriv_2016.8-1.1
## [116] yaml_2.2.1
## [117] plotrix_3.8-1
## [118] ashr_2.2-47
## [119] SQUAREM_2021.1
## [120] htmltools_0.5.1.1
## [121] memoise_2.0.0
## [122] VariantAnnotation_1.38.0
## [123] BiocIO_1.2.0
## [124] locfit_1.5-9.4
## [125] graphlayouts_0.7.1
## [126] batchtools_0.9.15
## [127] viridisLite_0.4.0
## [128] digest_0.6.27
## [129] assertthat_0.2.1
## [130] mime_0.11
## [131] rappdirs_0.3.3
## [132] tiff_0.1-8
## [133] futile.options_1.0.1
## [134] emdbook_1.3.12
## [135] RSQLite_2.2.7
## [136] amap_0.8-18
## [137] data.table_1.14.0
## [138] blob_1.2.2
## [139] R.oo_1.24.0
## [140] preprocessCore_1.54.0
## [141] labeling_0.4.2
## [142] splines_4.1.0
## [143] Cairo_1.5-12.2
## [144] ProtGenerics_1.24.0
## [145] RCurl_1.98-1.3
## [146] broom_0.7.9
## [147] hms_1.1.0
## [148] modelr_0.1.8
## [149] BiocManager_1.30.16
## [150] shape_1.4.6
## [151] aplot_0.0.6
## [152] sass_0.4.0
## [153] Rcpp_1.0.7
## [154] mvtnorm_1.1-2
## [155] circlize_0.4.13
## [156] enrichplot_1.12.2
## [157] fansi_0.5.0
## [158] tzdb_0.1.2
## [159] truncnorm_1.0-8
## [160] Nozzle.R1_1.1-1
## [161] ChIPseeker_1.28.3
## [162] R6_2.5.0
## [163] lifecycle_1.0.0
## [164] formatR_1.11
## [165] ShortRead_1.50.0
## [166] curl_4.3.2
## [167] TxDb.Hsapiens.UCSC.hg18.knownGene_3.2.2
## [168] jquerylib_0.1.4
## [169] DO.db_2.9
## [170] Matrix_1.3-4
## [171] TxDb.Celegans.UCSC.ce6.ensGene_3.2.2
## [172] qvalue_2.24.0
## [173] RColorBrewer_1.1-2
## [174] iterators_1.0.13
## [175] DOT_0.1
## [176] htmlwidgets_1.5.3
## [177] polyclip_1.10-0
## [178] biomaRt_2.48.2
## [179] crosstalk_1.1.1
## [180] shadowtext_0.0.8
## [181] rvest_1.0.1
## [182] ComplexHeatmap_2.8.0
## [183] patchwork_1.1.1
## [184] bdsmatrix_1.3-4
## [185] codetools_0.2-18
## [186] invgamma_1.1
## [187] lubridate_1.7.10
## [188] GO.db_3.13.0
## [189] gtools_3.9.2
## [190] prettyunits_1.1.1
## [191] R.methodsS3_1.8.1
## [192] gtable_0.3.0
## [193] DBI_1.1.1
## [194] highr_0.9
## [195] httr_1.4.2
## [196] KernSmooth_2.23-20
## [197] stringi_1.7.3
## [198] progress_1.2.2
## [199] reshape2_1.4.4
## [200] farver_2.1.0
## [201] annotate_1.70.0
## [202] viridis_0.6.1
## [203] rGREAT_1.24.0
## [204] Rgraphviz_2.36.0
## [205] ggtree_3.0.2
## [206] xml2_1.3.2
## [207] rvcheck_0.1.8
## [208] bbmle_1.0.23.1
## [209] systemPipeR_1.26.3
## [210] boot_1.3-28
## [211] restfulr_0.0.13
## [212] geneplotter_1.70.0
## [213] Category_2.58.0
## [214] BiocVersion_3.13.1
## [215] bit_4.0.4
## [216] scatterpie_0.1.6
## [217] jpeg_0.1-9
## [218] TxDb.Mmusculus.UCSC.mm10.knownGene_3.10.0
## [219] ggraph_2.0.5
## [220] pkgconfig_2.0.3